package com.hc;

import com.baomidou.mybatisplus.core.conditions.query.QueryWrapper;
import com.hc.domain.City;
import com.hc.domain.Country;
import com.hc.domain.Province;
import com.hc.domain.Town;
import com.hc.domain.Village;
import com.hc.mapper.CityMapper;
import com.hc.mapper.CountryMapper;
import com.hc.mapper.ProvinceMapper;
import com.hc.mapper.TownMapper;
import com.hc.mapper.VillageMapper;
import lombok.extern.slf4j.Slf4j;
import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element;
import org.jsoup.select.Elements;
import org.junit.jupiter.api.Test;
import org.springframework.boot.test.context.SpringBootTest;

import javax.annotation.Resource;
import java.io.IOException;
import java.util.ArrayList;
import java.util.List;

/**
 * 从国家统计局http://www.stats.gov.cn/tjsj/tjbz/tjyqhdmhcxhfdm/2020/54/5402.html
 * 爬取全国省市县镇村数据
 *
 * @author 梁云亮
 */
@Slf4j
@SpringBootTest
public class InitAdd5Tables {

    /**
     * 建立连接
     */
    private Document connect(String url) {
        if (url == null || url.isEmpty()) {
            throw new IllegalArgumentException("无效的url");
        }
        try {
            return Jsoup.connect(url).timeout(100 * 1000).get();
        } catch (IOException e) {
            System.out.println(url + "地址不存在");
            return null;
        }
    }

    /**
     * 获取所有的省份
     *
     * @return
     */
    public List<String> getProvinces() {
        List<String> res = new ArrayList<>();
        Document connect = connect("http://localhost:8080/2020/default.htm");
        Elements rowProvince = connect.select("tr.provincetr");
        for (Element provinceElement : rowProvince) {// 遍历每一行的省份城市
            Elements select = provinceElement.select("a");
            for (Element province : select) {// 每一个省份(四川省)
                String name = province.text();
                String code = province.select("a").attr("href");
                res.add(code.substring(0, code.lastIndexOf(".")) + "*" + name);
            }
        }
        return res;
    }

    @Test
    public void testGetProvince() {
        getProvinces().forEach(System.out::println);
    }

    @Resource
    private ProvinceMapper provinceMapper;

    @Test
    void insertProvinces() {
        List<Province> list = new ArrayList<>();
        for (String p : getProvinces()) {
            String[] split = p.split("\\*");
            Province province = Province.builder().code(split[0]).name(split[1]).build();
            list.add(province);
        }
        //list.forEach(System.out::println);
        int res = provinceMapper.batchInsert(list);
        System.out.println(res);
    }

    /**
     * 根据省份编号获取该省份下所有的市
     *
     * @param provinceCode 省份编号
     * @return
     */
    public List<String> getCitiesByProvince(String provinceCode) {
        List<String> res = new ArrayList<>();
        Document connect = connect("http://localhost:8080/2020/" + provinceCode + ".html");
        Elements rowCity = connect.select("tr.citytr");
        for (Element cityElement : rowCity) {// 遍历每一行的省份城市
            String name = cityElement.select("td").text();
            String[] split = name.split(" ");
            res.add(split[0].substring(0, 4) + "*" + split[1]);
        }
        return res;
    }

    @Test
    public void testGetCitiesByProvince() {
        getCitiesByProvince("41").forEach(System.out::println);
    }

    @Resource
    private CityMapper cityMapper;

    @Test
    void insertCities() {
        List<String> pList = getProvinces();
        for (String p : pList) {
            List<City> list = new ArrayList<>();
            String[] split = p.split("\\*");
            List<String> cList = getCitiesByProvince(split[0]);
            Province pp = provinceMapper.selectOne(new QueryWrapper<Province>().eq("code", split[0]));
            for (String c : cList) {
                String[] tmp = c.split("\\*");
                City city = City.builder().name(tmp[1]).code(tmp[0]).provinceId(pp.getId()).build();
                //System.out.println(city);
                list.add(city);
            }
            //一个省一个省的添加
            int res = cityMapper.batchInsert(list);
            System.out.println(res);
        }
    }

    /**
     * 根据省市编号获取该省份下所有的县
     *
     * @param cityCode 市编号
     * @return
     */
    public List<String> getCountriesByCity(String cityCode) {
        List<String> res = new ArrayList<>();
        Document connect = connect("http://localhost:8080/2020/" + cityCode + ".html");
        Elements rowCountry = connect.select("tr.countytr");
        if (rowCountry.size() == 0) {
            Elements townCountry = connect.select("tr.towntr");
            for (Element townElement : townCountry) {
                String txt = townElement.select("td").text();
                String[] split = txt.split(" ");
                res.add(split[0].substring(0, 9) + "*" + split[1]);
                //比如海南省下的儋州市，只有4级目录，没有country
            }
        } else {
            for (Element countryElement : rowCountry) {// 遍历每一行的省份城市
                String txt = countryElement.select("td").text();
                String[] split = txt.split(" ");
                res.add(split[0].substring(0, 6) + "*" + split[1]);
            }
        }
        return res;
    }

    @Test
    void testGetCountiesByProvince() {
        getCountriesByCity("46/4604").forEach(System.out::println);
    }

    @Resource
    private CountryMapper countryMapper;

    @Test
    void insertCountry() {
        List<String> pList = getProvinces();
        for (int i = 0; i < pList.size(); i++) {
            String p = pList.get(i);
            String[] split = p.split("\\*");
            //System.out.println(split[0] +"  "+split[1]); // 13  河北省
            List<String> cList = getCitiesByProvince(split[0]);
            for (String c : cList) {
                String[] split2 = c.split("\\*");
                //System.out.println(split2[0] +" * "+split2[1]);//1301 * 石家庄市
                List<String> c2List = getCountriesByCity(split2[0].substring(0, 2) + "/" + split2[0]);
                City city = cityMapper.selectOne(new QueryWrapper<City>().eq("code", split2[0]));
                List<Country> list = new ArrayList<>();
                for (String c2 : c2List) {
                    String[] split3 = c2.split("\\*");
                    System.out.println(split3[0] + " * " + split3[1]);
                    Country country = Country.builder().name(split3[1]).code(split3[0]).cityId(city.getId()).build();
                    list.add(country);
                }
                int res = countryMapper.batchInsert(list);
                System.out.println(res);
            }
        }
    }

    /**
     * 根据县编号获取乡
     *
     * @param countryCode
     * @return
     */
    public List<String> getTownsByCountry(String countryCode) {
        List<String> res = new ArrayList<>();
        Document connect = connect("http://localhost:8080/2020/" + countryCode + ".html");
        if (connect != null) {
            Elements rowTown = connect.select("tr.towntr");
            if (rowTown.size() == 0) {
                Elements rowVillage = connect.select("tr.villagetr");
                for (Element villageElement : rowVillage) {
                    String txt = villageElement.select("td").text();
                    String[] split = txt.split(" ");
                    res.add(split[1] + "*" + split[2]);
                    //比如海南省下的儋州市，只有4级目录，没有country
                }
            } else {
                for (Element townElement : rowTown) {// 遍历每一行的省份城市
                    String txt = townElement.select("td").text();
                    String[] split = txt.split(" ");
                    res.add(split[0].substring(0, 9) + "*" + split[1]);
                }
            }
        }
        return res;
    }

    @Test
    void testGetTownsByCountry() {
        getTownsByCountry("41/01/410122").forEach(System.out::println);
    }

    @Resource
    private TownMapper townMapper;

    @Test
    void insertTown() {
        List<String> pList = getProvinces();
        for (int i = 0; i < pList.size(); i++) {
            String p = pList.get(i);
            String[] split = p.split("\\*");
            //System.out.println(split[0] +"  "+split[1]); // 13  河北省
            List<String> cList = getCitiesByProvince(split[0]);
            for (String c : cList) {
                String[] split2 = c.split("\\*");
                //System.out.println(split2[0] +" * "+split2[1]);//1301 * 石家庄市
                List<String> c2List = getCountriesByCity(split2[0].substring(0, 2) + "/" + split2[0]);
                for (String c2 : c2List) {
                    String[] split3 = c2.split("\\*");
                    //System.out.println(split3[0] + " * " + split3[1]);  //130324 * 卢龙县
                    List<String> tList = getTownsByCountry(split3[0].substring(0, 2) + "/" + split3[0].substring(2, 4) + "/" + split3[0]);
                    List<Town> list = new ArrayList<>();
                    Country country = countryMapper.selectOne(new QueryWrapper<Country>().eq("code", split3[0]));
                    for (String t : tList) {
                        String[] split4 = t.split("\\*");
                        //System.out.println(split4[0] + " * " + split4[1]);
                        Town town = Town.builder().name(split4[1]).code(split4[0]).countryId(country.getId()).build();
                        //System.out.println(town);
                        list.add(town);
                    }
                    if (list.size() != 0) {
                        System.out.println(list);
                        int res = townMapper.batchInsert(list);
                        System.out.println(res);
                    }
                }
            }
        }
    }

    /**
     * 根据乡编号获取村
     *
     * @param townCode
     * @return
     */
    public List<String> getVillagesByCountry(String townCode) {
        List<String> res = new ArrayList<>();
        Document connect = connect("http://localhost:8080/2020/" + townCode + ".html");
        Elements rowVillage = connect.select("tr.villagetr");
        for (Element villageElement : rowVillage) {// 遍历每一行的省份城市
            String txt = villageElement.select("td").text();
            String t = txt.substring(13);
            res.add(t);
        }
        return res;
    }

    @Test
    void testGetVillagesByCountry() {
        getVillagesByCountry("41/01/22/410122104").forEach(System.out::println);
    }

    @Resource
    private VillageMapper villageMapper;

    @Test
    void insertVillage() {
        List<String> pList = getProvinces();
        for (int i = 25; i < pList.size(); i++) {
            String p = pList.get(i);
            String[] split = p.split("\\*");
            //System.out.println(split[0] +"  "+split[1]); // 13  河北省
            List<String> cList = getCitiesByProvince(split[0]);
            for (String c : cList) {
                String[] split2 = c.split("\\*");
                //System.out.println(split2[0] +" * "+split2[1]);//1301 * 石家庄市
                List<String> c2List = getCountriesByCity(split2[0].substring(0, 2) + "/" + split2[0]);
                for (String c2 : c2List) {
                    String[] split3 = c2.split("\\*");
                    //System.out.println(split3[0] + " * " + split3[1]);  //130324 * 卢龙县
                    List<String> tList = getTownsByCountry(split3[0].substring(0, 2) + "/" + split3[0].substring(2, 4) + "/" + split3[0]);
                    for (String t : tList) {
                        String[] split4 = t.split("\\*");
                        if (split4[0].length() != 3) {
                            //System.out.println(split4[0] + " * " + split4[1]); // 140802204 * 上郭乡
                            List<String> vList = getVillagesByCountry(split4[0].substring(0, 2) + "/" + split4[0].substring(2, 4) + "/" + split4[0].substring(4, 6) + "/" + split4[0]);
                            Town town = townMapper.selectOne(new QueryWrapper<Town>().eq("code", split4[0]));

                            List<Village> list = new ArrayList<>();
                            for (String v : vList) {
                                String[] split5 = v.split(" ");
                                Village village = Village.builder().name(split5[1]).code(split5[0]).townId(town.getId()).build();
                                list.add(village);
                            }
                            //System.out.println(list);
                            int res = villageMapper.batchInsert(list);
                            //System.out.println(res);
                        }
                    }
                }
            }
        }
    }
}
